import os
import glob
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
dataElecGenByFuel = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/Elec_generation_by_fuel.csv", sep=",")
elecGenByFuel_df1 = pd.DataFrame(dataElecGenByFuel).set_index('Country').replace({'-':None}).replace({'^':None})
dataGenByYear = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/ElectricityGeneration-TWH.csv", sep=",")
import numpy as np
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode()
dataGenByYear = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/ElectricityGeneration-TWH.csv", sep=",")
GenByYear_df1 = pd.DataFrame(dataGenByYear).rename(columns=dataGenByYear.iloc[1]).set_index('Country').dropna().drop(columns=['2021', '2011-21','2021']).replace({'-':None}).replace({'^':None}).drop(index=["Country"])
GenByYear_df1=GenByYear_df1.transpose()
py.iplot([{
'x': GenByYear_df1.index,
'y': GenByYear_df1[col],
'name': col
} for col in GenByYear_df1.columns], filename='cufflinks/multiple-lines-on-same-chart')
| 1985 | 1986 | 1987 | 1988 | 1989 | 1990.0 | 1991.0 | 1992.0 | 1993.0 | 1994.0 | ... | 2012.0 | 2013.0 | 2014.0 | 2015.0 | 2016.0 | 2017.0 | 2018.0 | 2019.0 | 2020.0 | 2021.0 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Country | |||||||||||||||||||||
| Canada | 459 | 468.6 | 496.4 | 506 | 498.1 | 480.6 | 507.0 | 518.6 | 530.2 | 553.1 | ... | 636.5 | 655.7 | 647.6 | 659.3 | 663.7 | 664.5 | 655.8 | 650.8 | 649.1 | 641.0 |
| Mexico | 96.2 | 99.6 | 106.2 | 112.2 | 120 | 117.6 | 128.6 | 132.3 | 134.6 | 146.1 | ... | 296.4 | 297.1 | 303.3 | 310.3 | 319.4 | 325.1 | 335.1 | 344.6 | 325.7 | 336.0 |
| US | 2657.2 | 2676.1 | 2772.2 | 2914.4 | 3155.4 | 3232.8 | 3270.7 | 3284.4 | 3404.7 | 3458.5 | ... | 4310.6 | 4330.3 | 4363.3 | 4348.7 | 4347.9 | 4302.5 | 4461.6 | 4411.2 | 4284.8 | 4406.4 |
| Total North America | 3212.4 | 3244.3 | 3374.8 | 3532.7 | 3773.6 | 3831.0 | 3906.3 | 3935.3 | 4069.6 | 4157.7 | ... | 5243.5 | 5283.1 | 5314.2 | 5318.4 | 5331.1 | 5292.2 | 5452.5 | 5406.5 | 5259.7 | 5383.5 |
| Argentina | 45.3 | 49 | 52.1 | 52.5 | 50.9 | 51.0 | 53.9 | 56.1 | 61.9 | 65.6 | ... | 136.0 | 139.4 | 138.6 | 145.4 | 147.2 | 145.6 | 146.8 | 139.5 | 144.6 | 152.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Total Asia Pacific | 1723.9 | 1815.7 | 1971.4 | 2123.8 | 2287.7 | 2469.2 | 2635.8 | 2780.2 | 2918.6 | 3176.0 | ... | 9278.3 | 9815.0 | 10335.8 | 10440.9 | 10951.1 | 11575.6 | 12372.4 | 12783.7 | 12949.3 | 13994.4 |
| Total World | 9883.2 | 10178 | 10667.9 | 11138 | 11657 | 11961.1 | 12222.7 | 12335.9 | 12599.4 | 12923.8 | ... | 22817.5 | 23452.4 | 24049.8 | 24292.0 | 24924.2 | 25647.7 | 26677.3 | 27036.6 | 26889.2 | 28466.3 |
| of which: OECD | 6560.3 | 6667 | 6948.7 | 7232.1 | 7605.3 | 7812.1 | 7986.6 | 8047.5 | 8229.2 | 8451.7 | ... | 11022.8 | 11014.8 | 10955.2 | 11004.1 | 11082.2 | 11124.4 | 11310.9 | 11191.3 | 10900.7 | 11210.2 |
| Non-OECD | 3322.8 | 3511 | 3719.2 | 3905.9 | 4051.7 | 4149.0 | 4236.1 | 4288.4 | 4370.2 | 4472.1 | ... | 11794.7 | 12437.7 | 13094.7 | 13287.9 | 13842.0 | 14523.4 | 15366.4 | 15845.3 | 15988.5 | 17256.1 |
| European Union # | 2022.1 | 2075.4 | 2147.4 | 2197.5 | 2248.4 | 2274.1 | 2317.7 | 2303.4 | 2303.8 | 2339.2 | ... | 2931.4 | 2912.1 | 2849.3 | 2897.9 | 2919.1 | 2951.4 | 2935.3 | 2894.0 | 2779.0 | 2895.3 |
102 rows × 37 columns
dataBioGenByYear = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/BiofuelsProduction-PJ.csv", sep=",")
BioGenByYear_df1 = pd.DataFrame(dataBioGenByYear).rename(columns=dataBioGenByYear.iloc[1]).set_index('Country').dropna().drop(columns=['2021', '2011-21','2021']).replace({'-':None}).replace({'^':None}).drop(index=["Country"])
frames = [GenByYear_df1, BioGenByYear_df1]
result = pd.concat(frames, keys=["ElectricityGenerationTWH", "BiofuelsProductionPJ"])
result
| Canada | Mexico | US | Total North America | Argentina | Brazil | Chile | Colombia | Ecuador | Peru | ... | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ElectricityGenerationTWH | 1985 | 459 | 96.2 | 2657.2 | 3212.4 | 45.3 | 193.7 | 14 | 29.4 | 4.6 | 12.1 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1986 | 468.6 | 99.6 | 2676.1 | 3244.3 | 49 | 202.1 | 14.8 | 31.7 | 5 | 12.9 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 1987 | 496.4 | 106.2 | 2772.2 | 3374.8 | 52.1 | 203.3 | 15.6 | 33.7 | 5.4 | 14 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 1988 | 506 | 112.2 | 2914.4 | 3532.7 | 52.5 | 215 | 16.9 | 35.5 | 5.6 | 13.6 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 1989 | 498.1 | 120 | 3155.4 | 3773.6 | 50.9 | 221.7 | 17.8 | 37 | 5.8 | 13.7 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| BiofuelsProductionPJ | Asia Pacific | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 161.6 | 204.3 | 248.6 | 308.7 | 227.5 | 288 | 287.3 | 403.4 | 529.7 | 515.4 |
| Total World | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 841.6 | 915.5 | 1024.7 | 1167.2 | 1068.3 | 1191.2 | 1282.7 | 1493.1 | 1628.2 | 1595.5 | |
| of which: OECD | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 511.7 | 542.4 | 612.9 | 661 | 656.1 | 686.7 | 752.4 | 835 | 832.7 | 831.4 | |
| Non-OECD | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 329.9 | 373.1 | 411.8 | 506.1 | 412.3 | 504.6 | 530.3 | 658.1 | 795.5 | 764.1 | |
| European Union | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 347.5 | 371 | 395.7 | 449.8 | 445.6 | 429.9 | 489.3 | 525.5 | 542.2 | 533.3 |
101 rows × 133 columns
fmask = 'C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/*.csv*'
DataFrames={}
fileNames={}
for f in glob.glob(fmask):
fileNames[f] = f
data = pd.read_csv(f, sep=',', encoding= 'unicode_escape')
DataFrames[f] = pd.DataFrame(data).rename(columns=data.iloc[1]).set_index('Country').dropna().replace({'-':0}).replace({'^':.005}).replace({'?':.05}).drop(index=["Country"])
fnStringArray = []
for fn in fileNames:
fnStringArray.append(str(fn.replace("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years\\", "")))
dfValueArray = list(DataFrames.values())
result = pd.DataFrame({'idx':fnStringArray, 'dfs':dfValueArray})
n=-1
for r in result['dfs']:
n+=1
print(n)
with pd.ExcelWriter('C:/Users/kyled/OneDrive/Documents/Georgian/F22/IntroToDataAnalytics-COMP2099/final/cleanedData.xlsx', mode="a", engine="openpyxl",if_sheet_exists="replace",) as writer:
result['dfs'].iloc[n].to_excel(writer, sheet_name=result['idx'].iloc[n])
0
C:\Users\kyled\anaconda3\lib\site-packages\openpyxl\workbook\child.py:99: UserWarning: Title is more than 31 characters. Some applications may not be able to read the file
warnings.warn("Title is more than 31 characters. Some applications may not be able to read the file")
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
import os
import glob
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
headMapData = pd.read_csv(r'C:\Users\kyled\Downloads\canada.2002-2008.monthlyAvgGHI.csv', sep=',', encoding= 'unicode_escape', low_memory=False)
heatMapDataFrame = pd.DataFrame(headMapData)
heatMapDataFrame
fig = px.density_mapbox(heatMapDataFrame, lat='latitude', lon='longitude', z='Avg Global', radius=1,
center=dict(lat=52.75, lon=-90), zoom=2,
mapbox_style="stamen-terrain")
fig.show()